*********************************************************************************
clear all
set maxvar 30000
version 14
capture log close
set more off
set seed 20082024


****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************

global MY_IN_PATH   "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"
global MY_OUT_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log

****************************************************************************************************
* import CS data
****************************************************************************************************
/*
use "${MY_IN_PATH}/patents_class_count_hhi_gvkey.dta", clear   
bys gvkey ayear : gen i = _N
keep if i==1 
destring gvkey, replace
rename ayear year 
tempfile pathhi
save `pathhi'
*/

use "${MY_IN_PATH}/Compustat_complete1950to201511_selecteditems_n.dta", clear

* merge CPI
merge n:1 year using "${MY_IN_PATH}/cpi.dta"

keep if _merge ==3
drop _merge
sum cpi

replace cpi = cpi/100
sum cpi

gen lrd_def = log(xrd/cpi)
replace lrd_def = 0 if lrd_def ==.


* merge new tech prox measure 
destring gvkey, replace
merge 1:1 gvkey year using "${MY_IN_PATH}/techprox_newptas20221003_pdate.dta"

drop if _merge == 2
drop _merge

/*
merge 1:1 gvkey year using `pathhi'

drop if _merge == 2
drop _merge

recode *uspc(.=0)
*/

foreach var of varlist no_new_cl5_2020 npat_new5_50_2020 npat2020 npat_old5_2020 {
replace `var' = 0 if `var' ==.
}

gen tp5y = 1-tp_raw5_2020 
gen lnpat = log(npat2020+1)

statastates, abbreviation(state)
keep if _merge == 3
drop _merge

ren state_fips fips

*merge wilson tax credits
merge n:1 fips year using "${MY_IN_PATH}/rd_tax_credit_det.dta"
keep if _merge ==3 
drop _merge

* Merge CPI
merge m:1 year using "${MY_IN_PATH}/cpi.dta", nogenerate keep(master matched)	

*******************************************************************************************************************
* Sample preparation
*******************************************************************************************************************
drop h hh hhh hhhh

replace xrd = 0 if xrd ==.
drop if xrd < 0

bys gvkey year (datadate): drop if _n!=1

destring sic, replace
drop if sic >= 6000 & sic <= 6999 // drop financials

drop if year > 2006

* only firms that patent
gegen h = sum(npat2020), by(gvkey)
drop if h == 0
drop h

gen rd_d = xrd/cpi
gen xrd_d = rd_d

egen _first_tax = min(year) if rd_efr_hi>0 & rd_efr_hi<., by(fips)
recode _first_tax (.=0)

egen first_tax = max(_first_tax), by(fips)

recode npat*old* (.=0)
recode npat*new* (.=0)

gen no_patold_cl_1000 = min(1000, npat_old5_50_2020) if npat_old5_50_2020!=.
gen npat_alt_1000 = npat_new5_50_2020 + no_patold_cl_1000

gen fr_npat_new5_w = npat_new5_2020 / npat_alt_1000
capture drop fr_new
gen fr_new = npat_new5_50_2020 / npat_alt_1000

* Patent dummies
gen dpalt = npat_alt_1000 > 0
gen dpnew =  npat_new5_2020 > 0
gen dpnewtech =  npat_new5_50_2020 > 0
gen dpold = no_patold_cl_1000 > 0

capture drop xrd_1000
* R&D also winsorized at 1000
sum xrd_d, d
gen xrd_1000 = min(1000, xrd_d) if xrd_d!=.

gen drd = xrd_d>0 & xrd_d<.
replace drd=. if xrd_d==.

gen sid = fips

capture drop l0rd_efr_hi
gen  l0rd_efr_hi =  rd_efr_hi

* no tax dummy, no profit dummies
gen no_tax = txt<=0
replace no_tax = . if txt==.

gen no_ebit = ebit/cpi<=0 
replace no_ebit = . if ebit==.

gen no_profit = gp/cpi <=0
replace no_profit = . if gp==.

*rename count_uspc  uspc_count

* Aggregate profit and tax variables to the firm-level using observations before the first tax credit (1982)
foreach x in  no_tax  no_ebit  no_profit {
gegen _total_`x'_i = sum(`x') if year<1982, by(gvkey)
gegen total_`x'_i  = max(_total_`x'_i ), by(gvkey)
drop _total_`x'_i 
gegen av_`x' = mean(`x') if year<1982, by(sic)
gegen av_`x'_ = min(av_`x'), by(sic)
replace av_`x'  = av_`x'_
drop av_`x'_ 
capture drop c count*
gen c = av_`x'!=.
egen count=sum(c), by(sic)
egen count_i = sum(c), by(gvkey)
egen `x'_i = sum(`x'), by(gvkey)
gen Av_`x' = av_`x'
replace Av_`x' = av_`x' * count / (count-count_i)  - `x'_i  / (count-count_i) if `x'_i !=.
sum Av_`x', d
gen dhi_`x' = 1 if Av_`x' > r(p50) & Av_`x' !=.
replace dhi_`x' = 0 if dhi_`x' ==. & Av_`x' !=.
sum dhi_`x'
}

gegen h = count(1) if year < 1982, by(sic)
gegen n = min(h), by(sic)

gen sic2 = substr(string(sic),1,2)
gen sic3 = substr(string(sic),1,3)

egen _av_profit_i = mean(gp/cpi) if year<1982, by(gvkey)
egen _sd_profit_i = sd(gp/cpi) if year<1982, by(gvkey)
egen av_profit_i = max(_av_profit_i), by(gvkey)
egen sd_profit_i = max(_sd_profit_i), by(gvkey)
gen unc_i = sd_profit_i / abs(av_profit_i)
egen unc_j = mean(unc_i), by(sic3)

* Unc_j takes out the focal firm of the uncertainty measure: 
* average uncertainty by industy is: unc_j = 1/N  \sum_{i=1}^N unc_i
* we want to measure uncertainty without firm i: Unc_j = (unc_j) * N/(N-K) - unc_i *K / (N-K) 
* where N is the number if observations in an industry and K are the number of non-missing observations for firm i
capture drop c count*
gen c = unc_i!=.
egen count=sum(c), by(sic)
egen count_i = sum(c), by(gvkey)
gen Unc_j = unc_j
replace Unc_j = unc_j * count / (count-count_i)  - unc_i *count_i / (count-count_i) if unc_i!=.

drop if year < 1977

la var xrd_1000 "R&D"
la var npat_alt_1000 "Patents"
la var no_patold_cl_1000 "Old tech"
la var npat_new5_2020 "New class"
la var npat_new5_50_2020 "New tech"
la var tp_raw5_2020 "tech.proximity"
la var dpold "Doldtech"
la var dpnew "Dnewclass"
la var dpnewtech "Dnewtech"
la var xrd_d "R&D"


* treatment dummy
capture drop treat
capture drop l1_treat

gen treat = rd_efr_hi>0 
la variable treat "Tax credit event"

gen l1_treat = l1rd_efr_hi >0

gen roa = ebit / at
gen lass = ln(at)	
gen lnsale = ln(sale)
foreach v in roa at sale {
sum `v', d
replace `v' = r(p1) if `v'<r(p1) 
replace `v' = r(p99) if `v'>r(p99) 
}


*  use only significant changes
capture drop l1_streat
gen l1_streat = l1_treat
replace l1_streat = 0 if state=="WI" & year<1991
replace l1_streat = 0 if state=="IA" & year<1991
replace l1_streat = 0 if state=="CA" & year<1991
replace l1_streat = 0 if state=="IN" & year<1991

capture drop streat
gen streat = treat
replace streat = 0 if state=="WI" & year<1990
replace streat = 0 if state=="IA" & year<1990
replace streat = 0 if state=="CA" & year<1990
replace streat = 0 if state=="IN" & year<1990

global cohort2 first_taxB	

capture  drop first_taxB
gen first_taxB = first_tax
replace first_taxB = 1990 if state=="WI" | state=="IN" | state=="IA" | state=="CA" 

* Define binary treatment and event studies					
 * define individual & time FE & cohort cluster
global idcode gvkey 
global year year
global cluster sid
* define cohort variable, non-treated 0 (to be generated)
global cohort first_tax
* enter first pre and last post-treatment period
global first 5 
global last  15

egen treatgroup = max(treat) if first_tax<=1991, by(sid)
gen treat_before = treatgroup*(1-treat)
gen treat_after = treatgroup*treat
gen usercost = rho_high - 1

* drop 2 states with <=2 firms 
drop if state=="WV" | state=="ND"
*log using "${MY_OUT_PATH}\BFSV_main_tables_240905.log", replace

*******************************************************************************************************************
* Estimation and tables
*******************************************************************************************************************

*******************************************************************************************************************
* *Table 1: descriptive statistics, * treatment and control group
*******************************************************************************************************************

tabstat rd_efr_hi rd_nom_hi usercost xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020  fr_new at roa sale if first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat rd_efr_hi rd_nom_hi usercost xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020  fr_new at roa sale if treat_before==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat rd_efr_hi rd_nom_hi usercost xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020  fr_new at roa sale if treat_after==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat rd_efr_hi rd_nom_hi usercost xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020  fr_new at roa sale if treatgroup==0 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)

*******************************************************************************************************************

*******************************************************************************************************************
* Table 2: baseline estimates. Poisson, continuous treatment
*******************************************************************************************************************

qui ppmlhdfe xrd_d l1rd_efr_hi if first_tax<=1991 ,  absorb(gvkey year) cluster(sid) d
estimates store a
scalar ra_xrd_d = e(r2_p)
qui ppmlhdfe npat_alt_1000 l1rd_efr_hi if first_tax<=1991  ,  absorb(gvkey year) cluster(sid)	d				
estimates store b
scalar ra_npat_alt_1000 = e(r2_p)
qui ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)	 d				
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
qui ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)		d			
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) scalars(N_full) ///
					keep (l1rd_efr_hi) sfmt(%10.0f) ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label noobs	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1") 				

* generate counts for number of observations and firms
foreach y in  xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  l1rd_efr_hi if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}		


*************************************************************************************************************************************
* define relative time indicators
capture drop ry
gen ry = year - $cohort2

forvalues k = 1/$first {
capture drop g_`k'
gen g_`k' = ry == -1 * `k'
la var g_`k' "-`k'"
}

forvalues k = 0/$last {
capture drop g`k'
gen g`k' = ry == `k'
la var g`k' "`k'"
}


global varlistpre
global varlistpost
global varlistpostl
global vpre
global vpost
global vpostl 

 
forvalues k = $first (-1)2 {
global varlistpre $varlistpre i.$cohort2#c.g_`k'
global vpre $vpre g_`k'
}

forvalues k = 0/ $last {
global varlistpost $varlistpost i.$cohort2#c.g`k'
global vpost $vpost g`k'
}

forvalues k = 1/ $last {
global varlistpostl $varlistpostl i.$cohort2#c.g`k'
global vpostl $vpostl g`k'
}
					
*************************************************************************************************************************************
* Table 3, Panel A
*************************************************************************************************************************************
*************************************************************************************************************************************
* Wooldridge estimator
*************************************************************************************************************************************

foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'    $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e_`y'					
}					
				
esttab e_xrd_d e_npat_alt_1000 e_no_patold_cl_1000 e_npat_new5_50_2020 , replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					title("Panel A: Tax credits, R&D and patenting, Wooldridge estimator")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  att "TaxCreditEvent" ) 				

* generate counts for number of observations and firms
foreach y in  xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  l1rd_efr_hi if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}							
					
*************************************************************************************************************************************
* Table 3, Panel B 
*************************************************************************************************************************************
*************************************************************************************************************************************
* Poisson 2-way-FE
**************************************************************************************************************************************
					
qui ppmlhdfe xrd_d l1_streat if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) & first_tax<=1991 ,  absorb(gvkey year) cluster(sid)
estimates store a
scalar rb_xrd_d = e(r2_p)
qui ppmlhdfe npat_alt_1000 l1_streat if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) & first_tax<=1991 ,  absorb(gvkey year) cluster(sid)					
estimates store b
scalar rb_npat_alt_1000 = e(r2_p)
sum l1rd_efr_hi if l1_streat==1 & e(sample)
qui ppmlhdfe  no_patold_cl_1000  l1_streat if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) & first_tax<=1991 ,    cluster(sid) absorb(gvkey year)					
est store c
scalar rb_no_patold_cl_1000 = e(r2_p)
qui ppmlhdfe  npat_new5_50_2020  l1_streat if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) & first_tax<=1991 ,    cluster(sid) absorb(gvkey year)					
est store d					
scalar rb_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace	pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) scalars(N_full) sfmt(%10.0f) noobs  ///
					drop(_cons) title("Panel B: Tax credits, R&D and patenting, two way FE")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_streat "TaxCreditEvent" ) 				

* generate counts for number of observations and firms
foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}	


********************************************************************************************************************					
* Table 4: within firm-state regressions
********************************************************************************************************************							
* see do file rndtax_credit_within_firms.do
	
********************************************************************************************************************					
* * Table 5: Heterogenuous treatment effects by uncertainty
********************************************************************************************************************					

* construct uncertainty measures
capture drop d_high_Unc
capture drop l1rd_efr_hi_unc
qui poisson npat_alt_1000 l1rd_efr_hi Unc_j if first_tax<=1991,  cluster(sid)					
capture drop patsample
gen patsample = e(sample)
sum Unc_j if patsample, d 
gen d_high_Unc = Unc_j>r(p50) & Unc_j<.
replace d_high_Unc = . if Unc_j==.

gen l1rd_efr_hi_unc = l1rd_efr_hi*d_high_Unc
		
capture drop dmUnc_j
qui sum Unc_j if patsample, d
gen dmUnc_j = Unc_j - r(mean)
capture drop l1rd_efr_hi_dmUnc_j
gen l1rd_efr_hi_dmUnc_j = l1rd_efr_hi * dmUnc_j
					
* some numbers in the text: 
tab no_ebit if patsample
tab no_ebit if patsample & d_high_Unc==1
tab no_ebit if patsample & d_high_Unc==0
tab no_tax  
tab no_tax if no_ebit ==1
tab no_tax if no_ebit ==0
corr no_tax no_ebit 

*************************************************************************************************************************************
*Summary stats uncertainty variables for Table 1
tabstat  dmUnc_j  d_high_Unc if first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  dmUnc_j  d_high_Unc  if treat_before==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  dmUnc_j  d_high_Unc  if treat_after==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  dmUnc_j  d_high_Unc  if treatgroup==0 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
*************************************************************************************************************************************

*************************************************************************************************************************************
* Table 5, Panel A: continuous R&D tax credit exposure X Uncertainty
*************************************************************************************************************************************

ppmlhdfe  no_patold_cl_1000   l1rd_efr_hi  l1rd_efr_hi_unc if first_tax<=1991  , cluster(sid) absorb(gvkey year)					
est store a
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi  l1rd_efr_hi_unc if first_tax<=1991  , cluster(sid) absorb(gvkey year)					
est store b
scalar ra_npat_new5_50_2020 = e(r2_p)
ppmlhdfe  no_patold_cl_1000   l1rd_efr_hi l1rd_efr_hi_dmUnc_j  if first_tax<=1991  , cluster(sid) absorb(gvkey year)					
est store c
scalar rb_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi  l1rd_efr_hi_dmUnc_j if first_tax<=1991 , cluster(sid) absorb(gvkey year)					
est store d
scalar rb_npat_new5_50_2020 = e(r2_p)

 esttab a b c d , 	replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) scalars(N_full) sfmt(%10.0f) ///
									drop(_cons) title("Tax credits and patents, varying degree of uncertainty")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1" l1rd_efr_hi_dmUnc_j "Tax credit x Unc" l1rd_efr_hi_unc "Tax credit D(Unc>median)") 									

* generate counts for number of observations and firms
foreach y in  no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1rd_efr_hi  l1rd_efr_hi_unc if first_tax<=1991  , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}					

foreach y in  no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  l1rd_efr_hi  l1rd_efr_hi_dmUnc_j if first_tax<=1991  , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}		
					
*************************************************************************************************************************************
*************************************************************************************************************************************

			
foreach v in r_d_intensity _av_rnd_int_i _av_rnd_int_i_2 total_av_rnd_int_i_2 high_rnd  dm_rnd_int l1_streat_hi_unc l1_streat_dmUnc_j{
capture drop `v'
}

gen l1_streat_hi_unc = l1_streat*d_high_Unc
gen l1_streat_dmUnc_j = l1_streat* dmUnc_j

global unvarlistpostl i.$cohort2#c.l1_streat#c.d_high_Unc

*************************************************************************************************************************************
* Table 5, Panel C: * binary R&D tax credit exposure X Uncertainty, Wooldridge estimator
*************************************************************************************************************************************	

********************************************************************************
* binary uncertainty measure
********************************************************************************
 
global unvarlistpostl i.$cohort2#c.l1_streat#c.d_high_Unc
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'   $varlistpostl $unvarlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar ra_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
local attunc 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
local attunc `attunc' + _b[`w'.$cohort2#l1_streat#c.d_high_Unc]*`s'/`N'
					}
						}
  }
nlcom (att: `att' ) (attunc: `attunc' ), post
est store u_`y'					
}					


********************************************************************************
* continuous uncertainty measure
********************************************************************************
  
global unvarlistpost2 i.$cohort2#c.l1_streat#c.dmUnc_j

foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'   $varlistpostl $unvarlistpost2  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
local attunce 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
local attunce `attunce' + _b[`w'.$cohort2#l1_streat#c.dmUnc_j]*`s'/`N'
					}
						}
  }
nlcom (att: `att' ) (attunce: `attunce' ), post
est store ue_`y'					
}					

				
esttab u_no_patold_cl_1000 u_npat_new5_50_2020 ue_no_patold_cl_1000 ue_npat_new5_50_2020, replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					title("Tax credits and patenting, by levels of uncertainty")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  att "TaxCreditEvent" attunce "...x Unc" attunc "...x D(Unc>median)") 				
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl $unvarlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl $unvarlistpost2 if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}					
					
	/*				
*************************************************************************************************************************************
*************************************************************************************************************************************
* Table 5, Panel B & D: Lasso regression with interaction Pr(no_ebit)=1
*************************************************************************************************************************************

gen r_d_intensity=xrd/sale
egen _av_rnd_int_i = mean(r_d_intensity) if year<=1986, by(gvkey)
bys gvkey: egen _av_rnd_int_i_2=mean(_av_rnd_int_i)
replace _av_rnd_int_i_2 = 1 if _av_rnd_int_i_2>1
egen total_av_rnd_int_i_2=mean(_av_rnd_int_i_2), by(sic3)	
replace total_av_rnd_int_i_2 = 1 if total_av_rnd_int_i_2>1
sum total_av_rnd_int_i_2 , d
gen high_rnd = total_av_rnd_int_i_2>r(p50) & total_av_rnd_int_i_2<.
gen dm_rnd_int = total_av_rnd_int_i_2 / r(mean)

gen cap_int = capx / sale 
egen _av_cap_int_i = mean(cap_int) if year<=1986, by(gvkey)
bys gvkey: egen _av_cap_int_i_2=mean(_av_cap_int_i)
egen total_av_cap_int_i_2=mean(_av_cap_int_i_2), by(sic3)	
sum total_av_cap_int_i_2, d
gen high_cap_int = total_av_cap_int_i_2>r(p50) & total_av_cap_int_i_2<.

gen cogs_sale = cogs / sale 
egen _av_cs_i = mean(cogs_sale) if year<=1986, by(gvkey)
bys gvkey: egen _av_cs_i_2=mean(_av_cs_i)
egen total_av_cs_i=mean(_av_cs_i_2), by(sic3)	
sum total_av_cs_i, d
gen high_cs = total_av_cs_i>r(p50) & total_av_cs_i<.

gen isic = .
replace isic = 1500 if sic2 == "20" // Food And Kindred Products
replace isic = 1700 if sic2 == "22" | sic2 == "23" // Textile Mill Products, Apparel And Other Finished Products Made From Fabrics And Similar Materials
replace isic = 2100 if sic2 == "26" // Paper And Allied Products
replace isic = 2200 if sic2 == "27" // Printing, Publishing, And Allied Industries
replace isic = 2320 if sic2 == "29" // Petroleum Refining And Related Industries
replace isic = 2400 if sic2 == "28" // Chemicals And Allied Products
replace isic = 2413 if sic3 == "282" //  Plastics Materials And Synthetic Resins, Synthetic
replace isic = 2423 if sic3 == "283" // Drugs
replace isic = 2429 if sic3 == "289" // Miscellaneous Chemical Products
replace isic = 2500 if sic2 == "30" // Rubber And Miscellaneous Plastics Products
replace isic = 2600 if sic2 == "32" // Stone, Clay, Glass, And Concrete Products
replace isic = 2610 if sic3 == "322"  | sic3 == "323"  //  Glass And Glassware, Pressed Or Blown ,, Glass Products, Made Of Purchased Glass
replace isic = 2695 if sic3 == "324" // Cement, Hydraulic
replace isic = 2700 if sic2 == "33" //  Primary Metal Industries
replace isic = 2710 if sic3 == "331" | sic3 == "332"  // Steel Works, Blast Furnaces, And Rolling And Finishing Mills, Iron And Steel Foundries
replace isic = 2800 if sic2 == "34" //  Fabricated Metal Products, Except Machinery And Transportation Equipment
replace isic = 2910 if sic3 == "356" // General Industrial Machinery And Equipment
replace isic = 2920 if sic3 == "355" // Special Industry Machinery, Except Metalworking
replace isic = 2922 if sic3 == "354" // Metalworking Machinery And Equipment
replace isic = 3010 if sic3 == "357" // Computer And Office Equipment
replace isic = 3100 if sic2 == "36" // Electronic And Other Electrical Equipment And Components, Except Computer Equipment
*replace isic = 3110 if sic3 == "371" // 
replace isic = 3210 if sic3 == "367" // Electronic Components And Accessories
replace isic = 3211 if sic == 3674 // Semiconductors and Related Devices
replace isic = 3220  if sic3 == "366" // Communications Equipment
replace isic = 3230 if sic == 3663 // Radio and Television Broadcasting and Communications Equipment
replace isic = 3311 if sic3 == "384" //Surgical, Medical, And Dental Instruments And Supplies
*replace isic = 3312 if sic2 == "" //
replace isic = 3314  if sic3 == "381" // Search, Detection, Navigation, Guidance, Aeronautical, and Nautical Systems, Instruments, and Equipment
replace isic = 3410 if sic3 == "371" // Motor Vehicles And Motor Vehicle Equipment
replace isic = 3430 if sic == 3714 // Motor Vehicle Parts and Accessories
replace isic = 3530 if sic3 == "372" // Aircraft And Parts
replace isic = 3600 if sic2 == "" //
* Other manufacturing
replace isic = 3600 if sic2 == "39" //  Miscellaneous Manufacturing Industries

merge n:1 isic using "${MY_IN_PATH}/cms_2000_table1.dta"
drop if _merge == 2

foreach v in patents secrecy leadtime {
capture drop i_`v' d_`v' m_`v' missing_`v'
gen missing_`v' = `v' ==. 
egen m_`v' = mean(`v')
gen i_`v' = `v'
replace i_`v' = m_`v' if `v'==.
qui sum `v', d
gen d_`v' = `v' > m_`v' & `v'!=.
}
				
foreach v in  _av_rnd_int_i_2 _av_cap_int_i_2  _av_cs_i_2 unc_i {
gen i`v' = `v'
gen missing`v' = i`v' ==.
qui sum `v', d 
gen high`v' = `v'>r(p50) & `v'!=.
replace i`v' = r(mean) if i`v'==.
}

local vlist total_av_cap_int_i_2   high_cap_int total_av_rnd_int_i_2 high_rnd d_high_Unc Unc_j total_av_cs_i high_cs ///
i_patents i_secrecy i_leadtime missing_patents  d_patents d_secrecy d_leadtime ///
 i_av_rnd_int_i_2 i_av_cap_int_i_2  i_av_cs_i_2 iunc_i high_av_rnd_int_i_2 high_av_cap_int_i_2  high_av_cs_i highunc_i  ///
missing_av_rnd_int_i_2 missing_av_cap_int_i_2 missing_av_cs_i_2 missingunc_i  

local quadratic_terms
local cubic_terms
foreach v of local vlist {
    local quadratic_terms `quadratic_terms' c.`v'#c.`v'
    local cubic_terms `cubic_terms' c.`v'#c.`v'#c.`v'
}

local interactions_2
local nvars: word count `vlist'
forvalues i = 1/`nvars' {
    local u: word `i' of `vlist'
    forvalues j = `=`i'+1'/`nvars' {
        local v: word `j' of `vlist'
        local interactions_2 `interactions_2' c.`u'#c.`v'
    }
}

lassologit no_ebit `vlist' `quadratic_terms'  `interactions_2' , lic(ebic) postresults

capture drop probhat
predict  probhat, pr

capture drop high_probhat  l1rd_efr_hi_probhat* 
qui poisson npat_alt_1000 l1rd_efr_hi probhat if first_tax<=1991, cluster(sid)	
capture drop psample2 
gen psample2 = e(sample)				
qui sum probhat if psample2, d
gen high_probhat = probhat>r(p50) & probhat<.
replace high_probhat=. if probhat==.
gen l1rd_efr_hi_probhat_high = l1rd_efr_hi*high_probhat
gen l1rd_efr_hi_probhat = l1rd_efr_hi*probhat

*************************************************************************************************************************************
*Summary stats profit risk for Table 1
tabstat  high_probhat  probhat if first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  high_probhat  probhat  if treat_before==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  high_probhat  probhat  if treat_after==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  high_probhat  probhat  if treatgroup==0 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
*************************************************************************************************************************************

*************************************************************************************************************************************
* Table 5, Panel B: continuous tax credit exposure
*************************************************************************************************************************************

ppmlhdfe  no_patold_cl_1000   l1rd_efr_hi  l1rd_efr_hi_probhat_high  if first_tax<=1991 , cluster(sid) absorb(gvkey year )	keepsingle				
est store a
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi  l1rd_efr_hi_probhat_high  if first_tax<=1991 , cluster(sid) absorb(gvkey year )	keepsingle				
est store b
scalar ra_npat_new5_50_2020 = e(r2_p)
ppmlhdfe  no_patold_cl_1000   l1rd_efr_hi l1rd_efr_hi_probhat   if first_tax<=1991 , cluster(sid) absorb(gvkey year )	keepsingle				
est store c
scalar rb_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi  l1rd_efr_hi_probhat  if first_tax<=1991 , cluster(sid) absorb(gvkey year )	keepsingle				
est store d
scalar rb_npat_new5_50_2020 = e(r2_p)

esttab a b c d , 	replace  se b(%10.3f) pr2 starlevels(* 0.10 ** 0.05 *** 0.01) noobs scalars(N_full) ///
					drop(_cons) sfmt( %10.0f)  ///
					title("Tax credits and patenting, predicted prob of zero EBIT")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") 			
					
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y' l1rd_efr_hi  l1rd_efr_hi_probhat_high  if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1rd_efr_hi l1rd_efr_hi_probhat if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}		
 
*************************************************************************************************************************************
* Table 5, Panel D
*************************************************************************************************************************************					
* Lasso probabilities and wooldridge estimator 
 *******************************************************************************
* continuous profit risk measure
********************************************************************************
 
global unvarlistpost2 i.$cohort2#c.l1_streat#c.probhat
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'   $varlistpostl $unvarlistpost2  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar ra_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
local attunc 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
local attunc `attunc' + _b[`w'.$cohort2#l1_streat#c.probhat]*`s'/`N'
					}
						}
  }
nlcom (att: `att' ) (attunc: `attunc' ), post
est store u_`y'					
}					


********************************************************************************
* binary high profit risk measure
********************************************************************************
  
global unvarlistpost i.$cohort2#c.l1_streat#c.high_probhat

foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'   $varlistpostl $unvarlistpost  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
local attunce 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
local attunce `attunce' + _b[`w'.$cohort2#l1_streat#c.high_probhat]*`s'/`N'
					}
						}
  }
nlcom (att: `att' ) (attunce: `attunce' ), post
est store ue_`y'					
}					

				
esttab ue_no_patold_cl_1000 ue_npat_new5_50_2020 u_no_patold_cl_1000 u_npat_new5_50_2020 , replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					title("Tax credits and patenting, by levels of profit risk")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  att "TaxCreditEvent" attunc "..X Pr(zero-EBIT)" attunce "..X high Pr(zero-EBIT)") 				
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl $unvarlistpost  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
 
foreach y in no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl $unvarlistpost2 if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}					
 
 
*************************************************************************************************************************************
* Histogram with predicted probabilities for Appendix: 
*************************************************************************************************************************************
					
hist probhat if patsample , bin(10) color(navy) graphregion(color(white)) xtit("Predicted probability of 0-EBIT")
graph export "${MY_OUT_PATH}/figures/hist10_0EBIT.png", as(png) replace
							
tab no_ebit if probhat!=. & probhat>0.5 & patsample==1
tab no_ebit if probhat!=. & probhat<=0.5 & patsample==1
  
 */
*************************************************************************************************************************************					
					
*************************************************************************************************************************************
* Table 6: Alternative measures
********************************************************************************					
* Alternative measures					
********************************************************************************
preserve

capture drop _merge				

merge 1:1 gvkey year using "${MY_IN_PATH}/DISCERN_Panel_Data_1980_2015.dta",					
drop if _merge ==2
drop _merge

gen pat_vs_sci = pat_yr/(pub_yr + pat_yr)
gen pat_per_sci = pat_yr/pub_yr			
keep if  pat_per_sci<500

merge 1:1 gvkey year using "${MY_IN_PATH}/bcites_20231228.dta" 
drop if _merge ==2
drop _merge	

gen scite_per_bcite = nncit/(nbcites_pv +nncit) 

merge 1:1 gvkey year using "${MY_IN_PATH}/text_measures_20231226.dta" 
drop if _merge ==2
drop _merge	

replace avg_sim = 1 if avg_sim>1 & avg_sim<.

********************************************************************************
* Summary statistics alternative measures for Table 1
tabstat  avg_sim  pat_vs_sci scite_per_bcite if first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  avg_sim  pat_vs_sci scite_per_bcite  if treat_before==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  avg_sim  pat_vs_sci scite_per_bcite  if treat_after==1 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
tabstat  avg_sim  pat_vs_sci scite_per_bcite  if treatgroup==0 & first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
********************************************************************************

********************************************************************************
* Table 6, Panel A:  continuous R&D tax credit exposure with alternative outcomes
********************************************************************************

reghdfe avg_sim l1rd_efr_hi if first_tax<=1991  ,  absorb(gvkey year) cluster(sid)					
estimates store a
scalar ra_`y' = e(r2_p)
reghdfe pat_vs_sci l1rd_efr_hi if first_tax<=1991  ,  absorb(gvkey year) cluster(sid)
estimates store b
scalar rb_`y' = e(r2_p)
reghdfe scite_per_bcite l1rd_efr_hi if first_tax<=1991  ,  absorb(gvkey year) cluster(sid)
est store c
scalar rc_`y' = e(r2_p)

esttab a b c , replace r2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) scalars(N_full) ///
					keep (l1rd_efr_hi) sfmt(%10.0f) ///
					title("Tax credits, alternative patent measures, Poisson / exponential mean regression")  label noobs	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1") 				
					

********************************************************************************
* Table 6, Panel B:  binary R&D tax credit exposure, alternative outcomes Wooldridge estimator
********************************************************************************					

 qui foreach y in  avg_sim  pat_vs_sci scite_per_bcite  {
 ivreghdfe `y'    $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(gvkey year) cluster (sid)  

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
 qui{
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e_`y'					
}					
				
esttab e_avg_sim e_pat_vs_sci e_scite_per_bcite , replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					title("Panel A: Tax credits, R&D and patenting, Wooldridge estimator")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  att "TaxCreditEvent" ) 				

* generate counts for number of observations and firms
foreach y in  avg_sim  pat_vs_sci scite_per_bcite  {
qui reg `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "R2:" rb_`y' 
}						
					
restore 
		
		
*************************************************************************************************************************************
*  Figures: Event study graphs
*************************************************************************************************************************************
*************************************************************************************************************************************		
* Poisson event studies Wooldridge estimator
*************************************************************************************************************************************

global cohort first_taxB

global varlistpre
global varlistpost
global vpre
global vpost
global vpostl 
 
forvalues k = $first (-1)2 {
global varlistpre $varlistpre i.$cohort2#c.g_`k'
global vpre $vpre g_`k'
}

forvalues k = 0/ $last {
global varlistpost $varlistpost i.$cohort2#c.g`k'
global vpost $vpost g`k'
}

forvalues k = 1/ $last {
global vpostl $vpostl g`k'
}

*************************************************************************************************************************************					
*estimate cohort-year specific treatment effects
*************************************************************************************************************************************

qui ppmlhdfe xrd_d  $varlistpre  $varlistpost g_1 if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0)  & xrd_d<=1000  ///
, absorb(gvkey year) vce(cluster sid) keepsingle

capture drop wsample
gen wsample = e(sample)

qui {
local tatt 
foreach v in $vpre $vpost  {
local tatt `tatt' (`v': 0
count if wsample==1 & `v'==1
local N = r(N)
levelsof $cohort2 if wsample==1, local(level)
foreach w in `level' {
quietly count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local tatt `tatt' + _b[`w'.$cohort2#c.`v']*`s'/`N'
					}
local tatt `tatt' )
							}
	}

nlcom `tatt' (g_1: 0 ), post
est store e_t

mat C1 = e(b)
mata st_matrix("A1",sqrt(diagonal(st_matrix("e(V)"))))
mat C1 = C1 \ A1'
mat list C1
local x = $first + 0.5
coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(`x') vertical ///
order( $vpre g_1 $vpost  ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in R&D") levels(95)
graph export "${MY_OUT_PATH}/figures/poisson_rd.png", replace


qui ppmlhdfe npat_alt_1000  $varlistpre  $varlistpost g_1 if first_tax<=1991  & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0)  ///
, absorb($idcode $year) vce(cluster $cluster) keepsingle

capture drop wsample
gen wsample = e(sample)

qui {
local tatt 
foreach v in $vpre $vpost  {
local tatt `tatt' (`v': 0
count if wsample==1 & `v'==1
local N = r(N)
levelsof $cohort2 if wsample==1, local(level)
foreach w in `level' {
quietly count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local tatt `tatt' + _b[`w'.$cohort2#c.`v']*`s'/`N'
						}
local tatt `tatt' )
							}
	}

nlcom `tatt' (g_1: 0 ), post
est store e_t

mat C1 = e(b)
mata st_matrix("A1",sqrt(diagonal(st_matrix("e(V)"))))
mat C1 = C1 \ A1'
mat list C1
local x = $first + 0.5
coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(`x') vertical ///
order( $vpre g_1 $vpost  ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in patents") levels(95)
graph export "${MY_OUT_PATH}/figures/poisson_patall.png", replace


qui ppmlhdfe no_patold_cl_1000  $varlistpre  $varlistpost g_1 if first_tax<=1991  & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0)  ///
, absorb(gvkey year) vce(cluster sid) keepsingle

capture drop wsample
gen wsample = e(sample)

qui {
local tatt 
foreach v in $vpre $vpost  {
local tatt `tatt' (`v': 0
count if wsample==1 & `v'==1
local N = r(N)
levelsof $cohort2 if wsample==1, local(level)
foreach w in `level' {
quietly count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local tatt `tatt' + _b[`w'.$cohort2#c.`v']*`s'/`N'
						}
local tatt `tatt' )
							}
	}

nlcom `tatt' (g_1: 0 ), post
est store e_t

mat C1 = e(b)
mata st_matrix("A1",sqrt(diagonal(st_matrix("e(V)"))))
mat C1 = C1 \ A1'
mat list C1
local x = $first + 0.5
coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(`x') vertical ///
order( $vpre g_1 $vpost  ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in patents old technologies") levels(95)
graph export "${MY_OUT_PATH}/figures/poisson_patold.png", replace

qui ppmlhdfe npat_new5_50_2020  $varlistpre  $varlistpost g_1 if first_tax<=1991  & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0)  ///
, absorb(gvkey year) vce(cluster sid) keepsingle

capture drop wsample
gen wsample = e(sample)

qui {
local tatt 
foreach v in $vpre $vpost  {
local tatt `tatt' (`v': 0
count if wsample==1 & `v'==1
local N = r(N)
levelsof $cohort2 if wsample==1, local(level)
foreach w in `level' {
quietly count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local tatt `tatt' + _b[`w'.$cohort2#c.`v']*`s'/`N'
						}
local tatt `tatt' )
							}
	}

nlcom `tatt' (g_1: 0 ), post
est store e_t
esttab e_t, b(3) se(3) starlevels(* 0.10 ** 0.05 *** 0.01) noobs 

mat C1 = e(b)
mata st_matrix("A1",sqrt(diagonal(st_matrix("e(V)"))))
mat C1 = C1 \ A1'
mat list C1
local x = $first + 0.5
coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(`x') vertical ///
order( $vpre g_1 $vpost  ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in patents new technologies") levels(95)
graph export "${MY_OUT_PATH}/figures/poisson_patnew.png", replace



*************************************************************************************************************************************
* APPENDIX FIGURES
*************************************************************************************************************************************	
* Wooldridge event study graphs: linear specifications based on Sun & Abraham
*************************************************************************************************************************************

capture drop first_tax_m
gen first_tax_m = first_taxB
recode first_tax_m(0=.)
capture drop ry
gen ry = year - first_tax_m

forvalues k = 1/5 {
capture drop g_`k'
gen g_`k' = ry == -1 * `k'
la var g_`k' "-`k'"
}

forvalues k = 0/15 {
capture drop g`k'
gen g`k' = ry == `k'
la var g`k' "`k'"
}

capture drop _ntreat never_treat
gen _ntreat = first_tax_m==.
egen never_treat = max(_ntreat), by(gvkey)

eventstudyinteract  xrd_d   g_5 g_4 g_3 g_2  g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 g_1 if ((year<=first_taxB+15 & year>=first_taxB-5) | first_taxB==0) & first_taxB<=1991 & xrd_d<=1000 , cohort(first_tax_m) control_cohort(never_treat)  absorb(i.gvkey i.year) vce(cluster sid)
        matrix C1 = e(b_iw)
        mata st_matrix("A1",sqrt(st_matrix("e(V_iw)")))
        matrix C1 = C1 \ A1
        matrix list C1
        coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(5.5) vertical ///
	order(  g_5 g_4 g_3 g_2 g_1 g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15  ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in R&D") levels(95)
graph export "${MY_OUT_PATH}/figures/rd_levels.png", replace	


eventstudyinteract  npat_alt_1000  g_5 g_4 g_3 g_2  g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 g_1 if ((year<=first_taxB+15 & year>=first_taxB-5) | first_taxB==0) & first_taxB<=1991 , cohort(first_tax_m) control_cohort(never_treat)  absorb(i.gvkey i.year) vce(cluster sid)
        matrix C1 = e(b_iw)
        mata st_matrix("A1",sqrt(st_matrix("e(V_iw)")))
        matrix C1 = C1 \ A1
        matrix list C1
        coefplot matrix(C1[1]), se(C1[2]) yline(0) xline(5.5) vertical ///
	order( g_5 g_4 g_3 g_2 g_1 g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in Patents All Technologies") levels(95)
graph export "${MY_OUT_PATH}/figures/patall_levels.png", replace	

eventstudyinteract  no_patold_cl_1000  g_5 g_4 g_3 g_2  g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 g_1 if ((year<=first_taxB+15 & year>=first_taxB-5) | first_taxB==0) & first_taxB<=1991 , cohort(first_tax_m) control_cohort(never_treat)  absorb(i.gvkey i.year) vce(cluster sid)
        matrix C2 = e(b_iw)
        mata st_matrix("A2",sqrt(st_matrix("e(V_iw)")))
        matrix C2 = C2 \ A2
        matrix list C2
        coefplot matrix(C2[1]), se(C2[2]) yline(0) xline(5.5) vertical ///
	order( g_5 g_4 g_3 g_2 g_1 g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10  g11 g12 g13 g14 g15) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in Patents Known Technologies") levels(95)
graph export "${MY_OUT_PATH}/figures/patold_levels.png", replace	

eventstudyinteract  npat_new5_50_2020 g_5  g_4 g_3 g_2  g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 g_1 if ((year<=first_taxB+15 & year>=first_taxB-5) | first_taxB==0) & first_taxB<=1991 , cohort(first_tax_m) control_cohort(never_treat)  absorb(i.gvkey i.year) vce(cluster sid)
        matrix C3 = e(b_iw)
        mata st_matrix("A3",sqrt(st_matrix("e(V_iw)")))
        matrix C3 = C3 \ A3
        matrix list C3
        coefplot matrix(C3[1]), se(C3[2]) yline(0) xline(5.5) vertical ///
	order( g_5 g_4 g_3 g_2 g_1 g0 g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 ) omitted recast(connected) graphregion(color(white)) xtit("Year relative to tax credit introduction") ytit("Change in Patents New Technologies") levels(95)
graph export "${MY_OUT_PATH}/figures/patnew_levels.png", replace	


log close
 
exit 



					